From 13b7091428060240e7a7701cb3bf783d6b705d2d Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= Date: Fri, 9 Dec 2016 22:17:15 +0100 Subject: [PATCH] extensions/simple: reuse some fast paths for more component counts --- extensions/simple.c | 200 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 158 insertions(+), 42 deletions(-) diff --git a/extensions/simple.c b/extensions/simple.c index 4755921..d03981d 100644 --- a/extensions/simple.c +++ b/extensions/simple.c @@ -4,29 +4,42 @@ int init (void); + static inline long -float_to_u8 (unsigned char *src_char, unsigned char *dst, long samples) +float_to_u8_x1 (unsigned char *src_char, unsigned char *dst, long samples) { float *src = (float *)src_char; long n = samples; while (n--) { float r = src[0]; - float g = src[1]; - float b = src[2]; - float a = src[3]; - dst[0] = (r >= 1.0f) ? 0xFF : ((r <= 0.0f) ? 0x0 : 0xFF * r + 0.5f); - dst[1] = (g >= 1.0f) ? 0xFF : ((g <= 0.0f) ? 0x0 : 0xFF * g + 0.5f); - dst[2] = (b >= 1.0f) ? 0xFF : ((b <= 0.0f) ? 0x0 : 0xFF * b + 0.5f); - dst[3] = (a >= 1.0f) ? 0xFF : ((a <= 0.0f) ? 0x0 : 0xFF * a + 0.5f); - - dst += 4; - src += 4; + dst += 1; + src += 1; } return samples; } +static inline long +float_to_u8_x4 (unsigned char *src_char, unsigned char *dst, long samples) +{ + return float_to_u8_x1 (src_char, dst, samples * 4); +} + +static inline long +float_to_u8_x3 (unsigned char *src_char, unsigned char *dst, long samples) +{ + return float_to_u8_x1 (src_char, dst, samples * 3); +} + +static inline long +float_to_u8_x2 (unsigned char *src_char, unsigned char *dst, long samples) +{ + return float_to_u8_x1 (src_char, dst, samples * 2); +} + + + static inline long float_pre_to_u8_pre (unsigned char *src_char, unsigned char *dst, long samples) { @@ -59,7 +72,7 @@ float_pre_to_u8_pre (unsigned char *src_char, unsigned char *dst, long samples) } static inline long -float_to_u16 (unsigned char *src_char, unsigned char *dst_char, long samples) +float_to_u16_x1 (unsigned char *src_char, unsigned char *dst_char, long samples) { float *src = (float *)src_char; uint16_t *dst = (uint16_t *)dst_char; @@ -67,20 +80,27 @@ float_to_u16 (unsigned char *src_char, unsigned char *dst_char, long samples) while (n--) { float r = src[0]; - float g = src[1]; - float b = src[2]; - float a = src[3]; - dst[0] = (r >= 1.0f) ? 0xFFFF : ((r <= 0.0f) ? 0x0 : 0xFFFF * r + 0.5f); - dst[1] = (g >= 1.0f) ? 0xFFFF : ((g <= 0.0f) ? 0x0 : 0xFFFF * g + 0.5f); - dst[2] = (b >= 1.0f) ? 0xFFFF : ((b <= 0.0f) ? 0x0 : 0xFFFF * b + 0.5f); - dst[3] = (a >= 1.0f) ? 0xFFFF : ((a <= 0.0f) ? 0x0 : 0xFFFF * a + 0.5f); - - dst += 4; - src += 4; + dst += 1; + src += 1; } return samples; } +static inline long +float_to_u16_x2 (unsigned char *src_char, unsigned char *dst_char, long samples) +{ + return float_to_u16_x2 (src_char, dst_char, samples * 2); +} +static inline long +float_to_u16_x3 (unsigned char *src_char, unsigned char *dst_char, long samples) +{ + return float_to_u16_x3 (src_char, dst_char, samples * 3); +} +static inline long +float_to_u16_x4 (unsigned char *src_char, unsigned char *dst_char, long samples) +{ + return float_to_u16_x4 (src_char, dst_char, samples * 4); +} static inline long float_pre_to_u16_pre (unsigned char *src_char, unsigned char *dst_char, long samples) @@ -146,7 +166,7 @@ float_pre_to_u32_pre (unsigned char *src_char, unsigned char *dst_char, long sam static inline long -float_to_u32 (unsigned char *src_char, unsigned char *dst_char, long samples) +float_to_u32_x1 (unsigned char *src_char, unsigned char *dst_char, long samples) { float *src = (float *)src_char; uint32_t *dst = (uint32_t *)dst_char; @@ -154,20 +174,30 @@ float_to_u32 (unsigned char *src_char, unsigned char *dst_char, long samples) while (n--) { float r = src[0]; - float g = src[1]; - float b = src[2]; - float a = src[3]; dst[0] = (r >= 1.0f) ? 0xFFFFFFFF : ((r <= 0.0f) ? 0x0 : 0xFFFFFFFF * r + 0.5f); - dst[1] = (g >= 1.0f) ? 0xFFFFFFFF : ((g <= 0.0f) ? 0x0 : 0xFFFFFFFF * g + 0.5f); - dst[2] = (b >= 1.0f) ? 0xFFFFFFFF : ((b <= 0.0f) ? 0x0 : 0xFFFFFFFF * b + 0.5f); - dst[3] = (a >= 1.0f) ? 0xFFFFFFFF : ((a <= 0.0f) ? 0x0 : 0xFFFFFFFF * a + 0.5f); - dst += 4; - src += 4; + dst += 1; + src += 1; } return samples; } +static inline long +float_to_u32_x2 (unsigned char *src_char, unsigned char *dst_char, long samples) +{ + return float_to_u32_x2 (src_char, dst_char, samples * 2); +} +static inline long +float_to_u32_x3 (unsigned char *src_char, unsigned char *dst_char, long samples) +{ + return float_to_u32_x3 (src_char, dst_char, samples * 3); +} +static inline long +float_to_u32_x4 (unsigned char *src_char, unsigned char *dst_char, long samples) +{ + return float_to_u32_x4 (src_char, dst_char, samples * 4); +} + static inline long u32_to_float (unsigned char *src_char, unsigned char *dst_char, long samples) @@ -205,12 +235,42 @@ init (void) babl_conversion_new (babl_format ("R'G'B'A float"), babl_format ("R'G'B'A u8"), "linear", - float_to_u8, + float_to_u8_x4, NULL); babl_conversion_new (babl_format ("RGBA float"), babl_format ("RGBA u8"), "linear", - float_to_u8, + float_to_u8_x4, + NULL); + babl_conversion_new (babl_format ("R'G'B' float"), + babl_format ("R'G'B' u8"), + "linear", + float_to_u8_x3, + NULL); + babl_conversion_new (babl_format ("RGB float"), + babl_format ("RGB u8"), + "linear", + float_to_u8_x3, + NULL); + babl_conversion_new (babl_format ("Y'A float"), + babl_format ("Y'A u8"), + "linear", + float_to_u8_x2, + NULL); + babl_conversion_new (babl_format ("YA float"), + babl_format ("YA u8"), + "linear", + float_to_u8_x2, + NULL); + babl_conversion_new (babl_format ("YA float"), + babl_format ("YA u8"), + "linear", + float_to_u8_x2, + NULL); + babl_conversion_new (babl_format ("Y' float"), + babl_format ("Y' u8"), + "linear", + float_to_u8_x1, NULL); babl_conversion_new (babl_format ("R'aG'aB'aA float"), babl_format ("R'aG'aB'aA u8"), @@ -227,12 +287,43 @@ init (void) babl_conversion_new (babl_format ("R'G'B'A float"), babl_format ("R'G'B'A u16"), "linear", - float_to_u16, + float_to_u16_x4, NULL); babl_conversion_new (babl_format ("RGBA float"), babl_format ("RGBA u16"), "linear", - float_to_u16, + float_to_u16_x4, + NULL); + + babl_conversion_new (babl_format ("R'G'B' float"), + babl_format ("R'G'B' u16"), + "linear", + float_to_u16_x3, + NULL); + babl_conversion_new (babl_format ("RGB float"), + babl_format ("RGB u16"), + "linear", + float_to_u16_x3, + NULL); + babl_conversion_new (babl_format ("Y'A float"), + babl_format ("Y'A u16"), + "linear", + float_to_u16_x2, + NULL); + babl_conversion_new (babl_format ("YA float"), + babl_format ("YA u16"), + "linear", + float_to_u16_x2, + NULL); + babl_conversion_new (babl_format ("Y' float"), + babl_format ("Y' u16"), + "linear", + float_to_u16_x1, + NULL); + babl_conversion_new (babl_format ("Y float"), + babl_format ("Y u16"), + "linear", + float_to_u16_x1, NULL); babl_conversion_new (babl_format ("R'aG'aB'aA float"), babl_format ("R'aG'aB'aA u16"), @@ -250,12 +341,42 @@ init (void) babl_conversion_new (babl_format ("R'G'B'A float"), babl_format ("R'G'B'A u32"), "linear", - float_to_u32, + float_to_u32_x4, NULL); babl_conversion_new (babl_format ("RGBA float"), babl_format ("RGBA u32"), "linear", - float_to_u32, + float_to_u32_x4, + NULL); + babl_conversion_new (babl_format ("R'G'B' float"), + babl_format ("R'G'B' u32"), + "linear", + float_to_u32_x3, + NULL); + babl_conversion_new (babl_format ("RGB float"), + babl_format ("RGB u32"), + "linear", + float_to_u32_x3, + NULL); + babl_conversion_new (babl_format ("Y'A float"), + babl_format ("Y'A u32"), + "linear", + float_to_u32_x2, + NULL); + babl_conversion_new (babl_format ("YA float"), + babl_format ("YA u32"), + "linear", + float_to_u32_x2, + NULL); + babl_conversion_new (babl_format ("Y' float"), + babl_format ("Y' u32"), + "linear", + float_to_u32_x1, + NULL); + babl_conversion_new (babl_format ("Y float"), + babl_format ("Y u32"), + "linear", + float_to_u32_x1, NULL); babl_conversion_new (babl_format ("R'aG'aB'aA float"), babl_format ("R'aG'aB'aA u32"), @@ -274,31 +395,26 @@ init (void) "linear", u32_to_float_x2, NULL); - babl_conversion_new (babl_format ("Y'A u32"), babl_format ("Y'A float"), "linear", u32_to_float_x2, NULL); - babl_conversion_new (babl_format ("Y u32"), babl_format ("Y float"), "linear", u32_to_float, NULL); - babl_conversion_new (babl_format ("Y' u32"), babl_format ("Y' float"), "linear", u32_to_float, NULL); - babl_conversion_new (babl_format ("RGBA u32"), babl_format ("RGBA float"), "linear", u32_to_float_x4, NULL); - babl_conversion_new (babl_format ("R'G'B'A u32"), babl_format ("R'G'B'A float"), "linear", -- 2.30.2